library(sandwich)
library(mgcv)
library(tikzDevice)
library(reshape)
library(plyr)
library(survey)
load("dem.panel.RData")
source("panel-utils.R")

##########################
#### Weighting Models ####
##########################

## Group the variables here into relevant categories ##
base.vars <- paste(c("camp.length","deminc",
                     "base.poll*deminc","as.factor(year)",
                     "base.und*deminc", "office"
                     ), collapse = "+")
treat.hist <- paste(c("d.gone.neg.l1","d.gone.neg.l2",
                      #"neg.dem.l1", "neg.dem.l2",
                      "d.neg.frac.l3","week*camp.length"#,"as.factor(year)"
                      ),
                    collapse = "+")
covs <- paste(c("dem.polls.l1",
                "neg.rep.l1", "neg.rep.l2", "r.neg.frac.l3",
                "num.rep.l1",
                "num.dem.l1",
                "undother.l1"
                ), collapse = "+")
cov.names <- c(dem.polls.l1 = "Polls$\\,_{t-1}$",
               neg.rep.l1 = "R Neg$\\,_{t-1}$",
               neg.rep.l2 = "R Neg$\\,_{t-2}$",
               r.neg.frac.l3 = "R Neg Total$\\,_{t-3}$",
               r.neg.frac.l2 = "R Neg Total$\\,_{t-2}$",
               num.rep.l1 = "R Ads$\\,_{t-1}$",
               num.dem.l1 = "D Ads$\\,_{t-1}$",
               undother.l1 = "Undecided$\\,_{t-1}$"
               )
n.cov.hist <- paste(c("s(dem.polls.l1, k = 3)",
                      "neg.rep.l1",
                      "neg.rep.l2", "r.neg.frac.l3",
                      "num.rep.l1",
                      "num.dem.l1",
                      "undother.l1"), collapse = "+")


i.cov.hist <- paste(c("neg.rep.l1",
                      #"I(sqrt(num.rep.l1))", #"undother.l1",
                      "neg.rep.l2", #"num.rep.l2",
                      "r.neg.frac.l2",
                      "s(dem.polls.l1)"
                      ), collapse = "+")

i.denom <- as.formula(paste("d.gone.neg ~ ",
                            paste(treat.hist, base.vars,
                                  i.cov.hist, sep = "+")))
n.denom <- as.formula(paste("d.gone.neg ~ ",
                            paste(treat.hist, base.vars,
                                  n.cov.hist, sep = "+")))

numer <- as.formula(paste("d.gone.neg ~ ",
                          paste(treat.hist, base.vars,
                                sep = "+")))

## non.comp are the extremely uncompetitive races that fail common
## support from baseline. Thus, this is an analysis is for the
## population of reasonably competive races.

non.comp <- unique(subset(dem.panel, base.poll > 70 | base.poll < 30
                          | is.na(base.poll) ,
                          "race"))[,1]
dem.panel <- subset(dem.panel, !(race %in% non.comp))

## Positive probability subset remove the weeks where there is either
## no chance of staying positive (too many ads to not have negative
## ones), or no chance of going negative (race is too safe, or there
## are no ads)

dem.panel$pos.prob.subset <- (dem.panel$week !=0 &
                              dem.panel$week > dem.panel$first.week +2 &
                              dem.panel$num.dem != 0 )




dem.panel$dem.pscore <- NA
dem.panel$dem.marpscore <- NA
dem.panel$sw <- NA

dem.wmod.ni <- iptw(denominator = n.denom, numerator = numer,
                    data = dem.panel, id = "race", time = "week",
                    family = "binomial", subset = deminc == 0,
                    ones  = !dem.panel$pos.prob.subset,
                    bal.covs = strsplit(covs, "\\+")[[1]])

dem.wmod.inc <- iptw(denominator = i.denom, numerator = numer,
                    data = dem.panel, id = "race", time = "week",
                    family = "binomial", subset = deminc == 1,
                    ones  = !dem.panel$pos.prob.subset,
                    bal.covs = strsplit(covs, "\\+")[[1]])
summary(dem.wmod.ni)
summary(dem.wmod.inc)
dem.panel$sw[dem.panel$deminc == 1] <- dem.wmod.inc$sw
dem.panel$sw[dem.panel$deminc == 0] <- dem.wmod.ni$sw
dem.panel$dem.pscore[dem.panel$deminc == 1] <- dem.wmod.inc$d.pscore
dem.panel$dem.pscore[dem.panel$deminc == 0] <- dem.wmod.ni$d.pscore



save(list = c("dem.panel", "dem.wmod.ni", "dem.wmod.inc", "base.vars",
       "treat.hist","i.cov.hist", "n.cov.hist",
       "n.denom", "numer", "i.denom", "covs"),
     file = "sensitivity-data-dem.RData")

dem.final <- dem.panel[dem.panel$week == -1, ]
dem.final <- dem.final[!is.na(dem.final$sw),]
dem.panel <- transform(dem.panel, log.sw = log(sw))
week.sum <- cast(na.omit(melt(dem.panel, id = c("race","week"),
                              measure=c("sw"))), week ~ ., summary)
week.sum
log.week.sum <- cast(na.omit(melt(dem.panel, id = c("race","week"),
                                  measure=c("log.sw"))), week ~ ., summary)
log.week.sum


par(mar = c(4.2,4,1,1) + .1, cex.main = .72, cex.axis = 0.72, cex.lab = 0.72)
subtle.boxplot(log.week.sum, xlab = "Weeks out from election", ylab =
               "Natural log of stabilized weights")
abline(h = 0, col = rgb(0.25, 0.25, 0.25))

##############
## Figure 6 ##
##############
par(mar = c(4.2,4,1,1) + .1, cex.main = .72, cex.axis = 0.72, cex.lab = 0.72)
subtle.boxplot(week.sum, xlab = "Weeks out from election", ylab =
               "Stabilized weights")
abline(h = 1, col = rgb(0.25, 0.25, 0.25))



##############
## Figure 5 ##
##############

## Balance plots
par(mfrow = c(1,2), mar = c(2.2,3.5,2,1.2) + 0.1, cex.main = .72,
    cex.axis = 0.72, cex.lab = 0.72)
plot(x = c(rep(0, times = nrow(dem.wmod.ni$balance)),
           rep(1, times = nrow(dem.wmod.ni$balance))),
     y = c(dem.wmod.ni$balance[,"USCoef"], dem.wmod.ni$balance[,"WSCoef"]),
     pch = 19, col= "grey", xlim = c(-0.75,1), axes = FALSE,
     ylab = "Standardized Coefficients", xlab = "", main =
     "Non-incumbent balance", ylim =
     c(-4,4))
axis(side = 1, at = c(0,1), labels = c("Unweighted",
                              "Weighted"))
axis(side = 2, las = 2)
polygon(x=c(-5,5,5,-5), y = c(2,2,-2,-2), col = rgb(0.8,0.8,0.8, alpha
                                            = .25), border = NA)
segments(x0 = 0, x1 = 1, y0 = dem.wmod.ni$balance[,"USCoef"],
         y1 = dem.wmod.ni$balance[,"WSCoef"])

text(x = 0, y = dem.wmod.ni$balance[,"USCoef"],
     cov.names[rownames(dem.wmod.ni$balance)], pos = 2,
     cex = 0.5)
plot(x = c(rep(0, times = nrow(dem.wmod.inc$balance)),
           rep(1, times = nrow(dem.wmod.inc$balance))),
     y = c(dem.wmod.inc$balance[,"USCoef"],
       dem.wmod.inc$balance[,"WSCoef"]),
     pch = 19, col= "grey", xlim = c(-0.75,1), axes = FALSE,
     ylab = "Standardized Coefficients", xlab = "", main =
     "Incumbent balance", ylim =
     c(-4,4))
axis(side = 1, at = c(0,1), labels = c("Unweighted",
                              "Weighted"))
axis(side = 2, las = 2, ps = 4)

polygon(x=c(-5,5,5,-5), y = c(2,2,-2,-2), col = rgb(0.8,0.8,0.8, alpha
                                            = .25), border = NA)

segments(x0 = 0, x1 = 1, y0 = dem.wmod.inc$balance[,"USCoef"],
         y1 = dem.wmod.inc$balance[,"WSCoef"])

ypostext <- sort(dem.wmod.inc$balance[,"USCoef"])
text(x = 0, y = c(1, 0.2, 0.8, 0.9, 1.3, 1.3,1,1) * ypostext,
     cov.names[rownames(dem.wmod.inc$balance)[order(dem.wmod.inc$balance[,"USCoef"])]], pos =
     2, cex = 0.5)





##############
## Figure 3 ##
##############

## Plot the nonlinear relationship between polls and going negative

## set things at their means/medians
consts <- with(model.frame(dem.wmod.ni$d.mod),
               c(1,mean(d.gone.neg.l1),
                 mean(d.gone.neg.l2),mean(d.neg.frac.l3) ,
                 mean(week),mean(camp.length),0, mean(base.poll), 0,0,0,
                 mean(base.und), median(office), mean(neg.rep.l1),
                 mean(neg.rep.l2), mean(r.neg.frac.l3), mean(num.rep.l1), mean(num.dem.l1),
                 mean(undother.l1), mean(week)*mean(camp.length)))

const <- sum(consts *dem.wmod.ni$d.mod$coef[1:length(consts)])

par(mar = c(4.2,4,1,1) + .1, cex.main = .72, cex.axis = 0.72, cex.lab = 0.72)
plot(dem.wmod.ni$d.mod, shade=TRUE,
     xlab="Democratic percent of the polls at $t-1$",
     ylab="Predicted probability of going negative",
     trans=function(x) 1/(1+exp(-(x))),
     ylim=c(-5,5), xlim = c(25, 65),
     yaxt="n", xaxt="n", bty="n" , sel=1, shade.col = rgb(0.1, 0.1, 0.1,
                                            alpha = 0.25))
axis(1)
axis(2, las=2)







#######################
### Analysis Models ###
#######################

## First, I perform the regressions (iptw, naive, and adjusted),
## switching the incumbent indicator to get the effect for each
## group. The SEs for the IPTW are asympototically correct, but I
## bootstrap the SEs below just to be sure.

adj.vars <- paste(c("r.neg.frac", "poll.mean", "I(poll.mean^2)",
                    "I(d.total/10)", "I(r.total/10)"
                    ),
                  collapse = "+")

causal.des <- svydesign(ids = ~ race, weights = ~ sw, data =
                        dem.final[!is.na(dem.final$sw),])

## Calculating effects for non-incumbents

## Formula for IPTW and Naive: only baseline covariates
share.form <- paste("demprcnt ~ ",
                    "deminc * d.neg.rec + deminc * I(d.neg.dur - d.neg.rec)+as.factor(year)+",
                    base.vars)

share.form <- as.formula(share.form)

## Formula for Adjusted: add in time-varying confounders
share.adj.form <- paste("demprcnt ~ ",
                        "deminc * d.neg.rec + deminc * I(d.neg.dur - d.neg.rec)+",
                        base.vars, "+", adj.vars)
share.adj.form <- as.formula(share.adj.form)


causal.share <-svyglm(share.form, causal.des)
summary(causal.share)

naive.share <- lm(share.form, data = dem.final, subset = !is.na(dem.final$sw))
summary(naive.share)

adj.share <- lm(share.adj.form, data = dem.final, subset = !is.na(dem.final$sw))
summary(adj.share)

## incumbent vote share ##

inc.form <- paste("demprcnt ~ ",
                  "I(deminc == 0) * d.neg.rec   + I(deminc == 0) * I(d.neg.dur - d.neg.rec) +",
                  gsub("deminc", "I(deminc == 0)", base.vars))

inc.form <- as.formula(inc.form)

inc.adj.form <- paste("demprcnt ~",
                      "I(deminc == 0) * d.neg.rec + I(deminc == 0) * I(d.neg.dur - d.neg.rec) +",
                      gsub("deminc", "I(deminc == 0)", base.vars), "+", adj.vars)
inc.adj.form <- as.formula(inc.adj.form)


causal.share.inc <- svyglm(inc.form, causal.des)
summary(causal.share.inc)

naive.share.inc <- lm(inc.form, data = dem.final,
                      subset = !is.na(dem.final$sw))
summary(naive.share.inc)

adj.share.inc <- lm(inc.adj.form, data = dem.final,
                    subset = !is.na(dem.final$sw))
summary(adj.share.inc)



rbind(iptw = summary(causal.share)$coef[3,],
      naive = summary(naive.share)$coef[3,],
      adjust = summary(adj.share)$coef[3,])

rbind(iptw = summary(causal.share.inc)$coef[3,],
      naive = summary(naive.share.inc)$coef[3,],
      adjust = summary(adj.share.inc)$coef[3,])


save(list=ls(), file = "dem-workspace.RData")

#### BOOTSTRAPPED SEs ######
## bootstrap the race
sims <- 500
causal.sha.boots <- matrix(NA, sims, length(causal.share$coef))
causal.inc.boots <- matrix(NA, sims, length(causal.share.inc$coef))
colnames(causal.sha.boots) <- names(causal.share$coef)
colnames(causal.inc.boots) <- names(causal.share.inc$coef)
naive.sha.boots <- matrix(NA, sims, length(naive.share$coef))
naive.inc.boots <- matrix(NA, sims, length(naive.share.inc$coef))
colnames(naive.sha.boots) <- names(naive.share$coef)
colnames(naive.inc.boots) <- names(naive.share.inc$coef)
adj.sha.boots <- matrix(NA, sims, length(adj.share$coef))
adj.inc.boots <- matrix(NA, sims, length(adj.share.inc$coef))
colnames(adj.sha.boots) <- names(adj.share$coef)
colnames(adj.inc.boots) <- names(adj.share.inc$coef)

race.list <- unique(dem.final$race[!is.na(dem.final$sw)])
race.lengths <- c()
race.rows <- list()
for (i in 1:length(race.list)) {
  race.rows[[i]] <- rownames(dem.panel[which(dem.panel$race
                                             ==
                                             race.list[i]),])
  race.lengths <- c(race.lengths, nrow(dem.panel[which(dem.panel$race ==
                                                       race.list[i]),]))

}
names(race.rows) <- race.list
names(race.lengths) <- race.list
for (i in 1:sims) {
  cat(i, " ")
  if (i %% 10 == 0) cat("\n")
  races <- sample(race.list, replace = TRUE,
                  size = length(race.list))
  races <- as.character(races)
  star <- unlist(race.rows[races])
  bootid <- rep(1:length(races), times = race.lengths[races])
  dem.star <- dem.panel[star,]
  dem.star$bootid <- bootid

  dem.wmod.ni <- iptw(denominator = n.denom, numerator = numer,
                      data = dem.star, id = "bootid", time = "week",
                      family = "binomial", subset = deminc == 0,
                      ones  = !dem.star$pos.prob.subset,
                      bal.covs = strsplit(covs, "\\+")[[1]])

  dem.wmod.inc <- iptw(denominator = i.denom, numerator = numer,
                       data = dem.star, id = "bootid", time = "week",
                       family = "binomial", subset = deminc == 1,
                       ones  = !dem.star$pos.prob.subset,
                       bal.covs = strsplit(covs, "\\+")[[1]])
  dem.star$sw[dem.star$deminc == 1] <- dem.wmod.inc$sw
  dem.star$sw[dem.star$deminc == 0] <- dem.wmod.ni$sw
  dem.star$dem.pscore[dem.star$deminc == 1] <- dem.wmod.inc$d.pscore
  dem.star$dem.pscore[dem.star$deminc == 0] <- dem.wmod.ni$d.pscore

  dem.star <- dem.star[dem.star$week == -1,]
  dem.star <- dem.star[!is.na(dem.star$sw),]

  causal.des.star <- svydesign(ids = ~ bootid, weights = ~ sw,
                               data = dem.star)

  causal.sha.star <- svyglm(share.form, design = causal.des.star)
  causal.inc.star <- svyglm(inc.form, design = causal.des.star)


  causal.sha.boots[i,] <- causal.sha.star$coef
  causal.inc.boots[i,] <- causal.inc.star$coef

  naive.sha.star <- lm(share.form, data = dem.star,
                       subset = !is.na(dem.star$sw) & week == -1)
  naive.inc.star <- lm(inc.form, data = dem.star,
                       subset = !is.na(dem.star$sw) & week == -1)

  naive.sha.boots[i,] <- naive.sha.star$coef
  naive.inc.boots[i,] <- naive.inc.star$coef

  adj.sha.star <- lm(share.adj.form, data = dem.star,
                     subset = !is.na(dem.star$sw) & week == -1)
  adj.inc.star <- lm(inc.adj.form, data = dem.star,
                     subset = !is.na(dem.star$sw) & week == -1)


  adj.sha.boots[i,] <- adj.sha.star$coef
  adj.inc.boots[i,] <- adj.inc.star$coef

}

iptw.sha <- aaply(causal.sha.boots, 2, regTable)
iptw.inc <- aaply(causal.inc.boots, 2, regTable)
colnames(iptw.sha) <- c("Mean", "SE", "95% Lower Bound",
                        "95% Upper Bound")
colnames(iptw.inc) <- c("Mean", "SE", "95% Lower Bound",
                        "95% Upper Bound")

iptw.sha <- iptw.sha[colnames(causal.sha.boots),]
iptw.inc <- iptw.inc[colnames(causal.inc.boots),]

naive.sha <- aaply(naive.sha.boots, 2, regTable)
naive.inc <- aaply(naive.inc.boots, 2, regTable)
colnames(naive.sha) <- c("Mean", "SE", "95% Lower Bound",
                         "95% Upper Bound")
colnames(naive.inc) <- c("Mean", "SE", "95% Lower Bound",
                         "95% Upper Bound")
naive.sha <- naive.sha[colnames(naive.sha.boots),]
naive.inc <- naive.inc[colnames(naive.inc.boots),]



adj.sha <- aaply(adj.sha.boots, 2, regTable)
adj.inc <- aaply(adj.inc.boots, 2, regTable)
colnames(adj.sha) <- c("Mean", "SE", "95% Lower Bound",
                       "95% Upper Bound")
colnames(adj.inc) <- c("Mean", "SE", "95% Lower Bound",
                       "95% Upper Bound")

adj.sha <- adj.sha[colnames(adj.sha.boots),]
adj.inc <- adj.inc[colnames(adj.inc.boots),]

noninc.table <- rbind(iptw = iptw.sha["d.neg.rec",],
                      naive = naive.sha["d.neg.rec",],
                      adjust = adj.sha["d.neg.rec", ])

inc.table <- rbind(iptw = iptw.inc["d.neg.rec",],
                   naive = naive.inc["d.neg.rec", ],
                   adjust = adj.inc["d.neg.rec", ])


#############
## Table 1 ##
#############

options(digits = 4)
noninc.table
inc.table

save(list=c("noninc.table", "inc.table","causal.sha.boots","causal.inc.boots"), file = "tables.RData")




##############
## Figure 4 ##
##############

par(mar=c(4.5,4,2.5,1)+.1, cex.main = 1, cex.axis = 0.72,
    cex.lab = 0.72)
plot(x=NULL, y=NULL, xlim=c(-2,2), ylim=c(20,120), axes=FALSE, ylab="",
     xlab="Effect of a week of negative advertising on Democratic voteshare")
axis(side=1)

abline(v=0, lty=2, lwd=1, col="gray60")

points(x=inc.table[1,1], y=100, pch=19)
segments(x0=inc.table[1,3], x1=inc.table[1,4],
         y0=100, y1=100, lwd=2)


text(x=-2, y=110, "\\underline{Democrat incumbents}",pos=4, cex = .8)

text(x=1.2*inc.table[1,4], y=100, "Late Campaign",pos=4, cex = .72)

early.noninc <- regTable(causal.sha.boots[,4])
early.inc <- regTable(causal.inc.boots[,4])
points(x=early.inc[1], y=90, pch=19)
segments(x0=early.inc[3], x1=early.inc[4],
         y0=90, y1=90, lwd=2)



text(x = 1.2 * early.inc[4], y = 90,
     "Early Campaign",pos=4, cex = 0.72)

points(x=noninc.table[1,1], y=50, pch=19)
segments(x0=noninc.table[1,3], x1=noninc.table[1,4],
         y0=50, y1=50, lwd=2)

text(x=-2, y=60, "\\underline{Democrat nonincumbents}",
     cex = 0.8, pos =4)
text(x=.8 * noninc.table[1,3], y=50, "Late Campaign", pos=2, cex = .72)

points(x=early.noninc[1], y=40, pch=19)
segments(x0=early.noninc[3], x1=early.noninc[4],
         y0=40, y1=40, lwd=2)
                                        #text(x=noninc.early.svyci, y=40, c("[","]"))
text(x=0.95*early.noninc[3], y=40, "Early Campaign",
     pos=2, cex = 0.72)
